******************************************************************************************************************
* PROBABILISTIC BINARY DISABILITY MEASURE
******************************************************************************************************************

/* SET-UP
		All of these model syntaxes have a section at the starts that sets the details of the analyses that follow - the idea is that it is easier to change 
		things once, than to have to do find-and-replace throughout a series of syntax files.
		We have filled these in with the variables that we used - but you will need to change this to the variables in your data.
		Obviously you also need to load your data here!
*/
// These are the same for all the example syntax files
global empvar				"rworknew"				// the binary employment variable (1=working, 0=non-working)
global countryvar			"country"				// the categorical variable for country, with each country denoted by a value
global disvar				"llsiH"					// the binary self-reported general disability variable (1 for has a disability, 0 for does not)
global pweight				"rwtresp2"				// the probability weight used for this survey
global controls				"i.ragey i.rmale" 		// control variables
global outputdir 			"${user}\OneDrive - King's College London\Disability work\ESRC Future Leaders Disability\Phase 1 (Dis Emp Rates) - Intl\ELSA-SHARE-HRS\Outputs"	// the file location to save the output tables to
/* Setting the analysis sample (needs to be done from the outset) 
   - these are the ages and countries used in our analysis - you should set this to whatever you are using. */
keep if (ragey>=50 & ragey<70) 
keep if (year==2015 & inlist(country,19,23,29,34,35)) | ( (year==2013 & !inlist(country,19,23,34,35)) | (country==61 & year==2014) | (country==51 & year==2010) )	
drop if missing(${empvar}, ragey, rmale) | (missing(${disvar}) & !inlist(country,51,61) )
// Final things
svyset [pw=${pweight}], strata(country)		// for some commands, it's easier to use the svy prefix than to set weights with [pweight=${pweight}]
// You don't need the extra globals in the predicted disability syntax

	
*__________________________________________________________________________________________________
* 
**# DEFINING THE PROGRAM TO RUN BELOW
*__________________________________________________________________________________________________

capture program drop disemp_sim
program define disemp_sim, eclass 
		syntax [if] [in] [pweight iweight], countryvar(name) disvar(name) predictedvar(name) empvar(name) [controls(varlist fv)]  
		marksample touse					// this sets the sample for analysis - see 'help marksample'
		
		****************************************************************************************************
		* PRELIMINARIES
		****************************************************************************************************
		// Creating the predicted disvar for each iteration - weights are constant, but random element needs to change in each botstrap replication 
		capture drop rand1 
		gen rand1 = runiform()
		capture drop `predictedvar'
		gen 	`predictedvar' = 0 if						 ~missing(p_predicted)
		replace `predictedvar' = 1 if p_predicted  > rand1 & ~missing(p_predicted)
		label var `predictedvar' "Disability IN THIS REPLICATION"

		
		****************************************************************************************************
		* ESTIMATION COMMANDS
		****************************************************************************************************
		local fullmatlist = ""							// this is the list of results to output - it needs to be blank at the start of each replication
			
		// Command for DIS EMP GAP
		logit `empvar' ib(1).`predictedvar'##i.`countryvar' `controls' `touse' [`weight' `exp']	
			* Samples sizes  in the matrix n_`predictedvar' (e.g. n_predicted), which is outputted from the command below
				matrix n_`predictedvar' =  `e(N)' 														// [1,1] is obs, [1,2] is n(people) = `e(N_clust)'
				levelsof `countryvar' if e(sample), local(wavelist_`predictedvar')						// a list of countries, used for loop below
				matrix n_`predictedvar' = n_`predictedvar' , wordcount("`wavelist_`predictedvar''")		// revises the matrix so that there's an extra col with number of country-wave clusters
			* Margins
			margins i.`predictedvar' , over(`countryvar') 	at(${controlsmeans}) post 
				matrix disemp_`predictedvar'_b  = e(b)
				matrix disemp_`predictedvar'_ll = r(table)[rownumb(r(table), "ll"), 1...]
				matrix disemp_`predictedvar'_ul = r(table)[rownumb(r(table), "ul"), 1...]
				* To get CIs for emp gap, need to use lincom as follows
				local i = 0
				local gapcolnames = "" 
				foreach wave in `wavelist_`predictedvar'' 	{
					local i = `i' + 1			// a consecutive counter, rather than the number of the country
					lincom _b[`wave'.`countryvar'#0.`predictedvar'] - _b[`wave'.`countryvar'#1.`predictedvar']
					if `i'==1		matrix disgap_`predictedvar'_b  = r(estimate)
					else			matrix disgap_`predictedvar'_b  = disgap_`predictedvar'_b , r(estimate)
					if `i'==1		matrix disgap_`predictedvar'_ll = r(lb)
					else			matrix disgap_`predictedvar'_ll = disgap_`predictedvar'_ll, r(lb)
					if `i'==1		matrix disgap_`predictedvar'_ul = r(ub)
					else			matrix disgap_`predictedvar'_ul = disgap_`predictedvar'_ul, r(ub)
					local gapcolnames = "`gapcolnames' `wave'.`countryvar'"
				/*end country-wave loop*/				}	
				foreach stat in b ll ul		{
					matrix colnames disgap_`predictedvar'_`stat' = `gapcolnames'
				/**/						}
				
		// Command for DIS ITSELF
		logit 				`predictedvar'    i.`countryvar' `controls' `thisif' [`weight' `exp'] 
			* Samples sizes  in the matrix n_`predictedvar' (e.g. n_predicted), which is outputted from the command below
				matrix  n_`predictedvar' =  n_`predictedvar', `e(N)'									// Sample size for dis, just as a check
			* Margins
			margins					, over(`countryvar') 	at(${controlsmeans}) post 
				matrix dis_`predictedvar'_b  = e(b)
				matrix dis_`predictedvar'_ll = r(table)[rownumb(r(table), "ll"), 1...]
				matrix dis_`predictedvar'_ul = r(table)[rownumb(r(table), "ul"), 1...]
			
		// Labelling 
		* Labelling the main results
		if "`fullmatlist'"==""	local fullmatlist = "n_`predictedvar'"
		else 					local fullmatlist = "`fullmatlist', n_`predictedvar'"
		foreach stat in b ll ul		{
			matrix coleq 	disgap_`predictedvar'_`stat' = `predictedvar'_disgap_`stat'
			matrix coleq 	disemp_`predictedvar'_`stat' = `predictedvar'_disemp_`stat'
			matrix coleq 	   dis_`predictedvar'_`stat' = `predictedvar'_dis_`stat'
			local fullmatlist = "`fullmatlist', disgap_`predictedvar'_`stat', disemp_`predictedvar'_`stat', dis_`predictedvar'_`stat'"		// 	The list of all matrices to include in the results
		/**/						}
		matrix coleq	     n_`predictedvar'   = n_`predictedvar'
		* Labelling the matrix of sample sizes
		matrix colnames n_`predictedvar' = "obs_emp_`predictedvar'" "countries_emp_`predictedvar'" "obs_dis_`predictedvar'" // labelling the matrix

		****************************************************************************************************
		* OUTPUTTING
		****************************************************************************************************
		tempname simoutput
		matrix `simoutput' = `fullmatlist'
		ereturn post `simoutput' 
		ereturn local cmd="simulate"
		ereturn scalar N = n_`predictedvar'[1,1]		// This is the sample size for the employment regression for SR disability (sample size for distype regression is [1,3]) - necessary for bootstrapping, but also useful for simulate
		
end

	
*__________________________________________________________________________________________________
* 
**# RUNNING 'SIMULATE'
*__________________________________________________________________________________________________

// Setting the mean values of control variables, to hold constant for the average marginal effects
global controlsmeans ""					// Calculating mean of controls2 variables - can't be done when setting the globals, as it depends on the analysis dataset
	capture svy: mean ${controls} 
	foreach word in `e(varlist)'	{
		local col = `col' + 1
		local working = e(b)[1, `col']
		global controlsmeans "${controlsmeans} `word'=`working'"
	/**/							}
dis in red "Controls are ${controls} - their means in the atspec are ${controlsmeans"}


// Running the programme
matrix drop _all
svyset [pw=${pweight}], strata(country)
global run = 				"100"			// give a unique identifier to each run you do (this needs to be a number)
global simreps 				"3"				// simulations for the full sample estimate before bootstrapping; Can't set bootstrap reps, because these are decided when you're creating the bootstrap replicate weights above
simulate , reps(${simreps}) seed(123${run}456) saving("${outputdir}/justsim_run${run}.dta", replace)  /// 	
	/**/ :  disemp_sim [pweight=${pweight}], countryvar(${countryvar}) controls(${controls}) empvar(${empvar}) disvar(${disvar}) predictedvar(${predictedvar}) 
/* Quick tips when amending this to fit your data:
		Run the disemp_sim program without 'simulate' to test/debug it.
		If the program works, but simulate doesn't, then use the 'noisily' option of simulate to debug it.
*/
	

// Outputting results from the dataset of the results of each replication
// 		NOTE: this formats the results to be the same as from the bootstrap command (see main replication file)
// 		This then needs importing into Excel and turning into a better table (using LOOKUP or MATCH functions)
unab allvars: _all
local i = 1
foreach var in `allvars'	{
	sum `var'
	if `i'==1		matrix output = 		`r(mean)'
		else		matrix output = output, `r(mean)' 
	local thislab: variable label `var'
	local thislab = subinstr(subinstr(subinstr("`thislab'", "]_b[", ":", .), "[", "", .), "]", "", .)
	if `i'==1		global rownames  = "`thislab'"
		else		global rownames  = "${rownames} `thislab'"
	local ++i
/**/						}
matrix colnames output = $rownames
matrix output = output'
esttab matrix(output) using "${outputdir}\simulate_run${run}.csv", csv replace not nostar nonum nodepvars b(%5.4f) /// 	
	addnotes("Number of replications is ${simreps}, date outputted is `c(current_date)'")
